Online-Academy
Look, Read, Understand, Apply

Data Mining And Data Warehousing

Simple - Apriori- First step

A simple Apriori concept

import itertools as it
if __name__ == "__main__":
    transactions = [
        ['milk', 'bread', 'butter'],
        ['bread', 'butter'],
        ['bread', 'butter','milk'],
        ['milk', 'bread'],
        ['milk', 'butter'],
        ['bread', 'butter']
    ]
single_items = {'milk', 'bread', 'butter'}
l = set(it.product(single_items,single_items))
for x in l:
    print(set(x))

items = {'milk','bread','butter'}

for x in l:
    count = 0
    for t in transactions:
        x = set(x)
        t = set(t)
        if  x.issubset(t):
            count += 1
    print(x, ":",count)

Simple Bayesian Concept

import pandas as pd

if __name__ == "__main__":
    df = pd.read_csv("Bayesian-classifier-i.csv")
    count = 0
    age_yes = df[(df["age"]=="youth") & (df["Buys_computer"]=="yes")]
    #print(records)
    print(len(age_yes))
    income_yes = df[(df["income"] == "high") & (df["Buys_computer"] == "yes")]
    print(len(income_yes))
    student_yes = df[(df["student"] == "yes") & (df["Buys_computer"] == "yes")]
    print(len(student_yes))
    credit_yes = df[(df["credit_rating"] == "fair") & (df["Buys_computer"] == "yes")]
    print(len(credit_yes))
    class_yes = df[(df["Buys_computer"]=="yes")]
    print(len(class_yes))
    print(df.query("income=='low' and student=='yes'"))
    records = df.query("income=='low' and student=='yes'")
    print(len(records))
    income = "low"
    print(df.query("income==@income"))
    print(df.loc[(df["income"]=="low") & (df["Buys_computer"]=="yes")])
    print(df[df["income"].isin(["low","medium"])])

Bayesian Classifier - Rough concept

import pandas as pd

if __name__ == "__main__":

    df = pd.read_csv("Bayesian-classifier-i.csv")
    print(df)
    df.fillna({"income":"ok"}, inplace=True)
    print(df)
    print(df["student"])
    count = 0
    print(df.describe())
    total_yes = len(df.query("classes=='yes'"))
    total_junior_yes = len(df.query("age=='junior' and classes=='yes'"))
    total_high_yes = len(df.query("income=='high' and classes=='yes'"))
    pro_total_junior_yes = total_junior_yes/total_yes
    pro_total_high_yes = total_high_yes/total_yes
    print(f"pro junior {pro_total_junior_yes}")
    print(f"pro high {pro_total_high_yes}")

    #x = (age='youth' and income='high' and credit_rating='fair')

Bayesian-classifier-i.csv

SN,Name,age,income,student,credit_rating,classes
1,r,youth,High,no,fair,no
2,a,youth,High,no,excellent,no
3,t,middle_age,High,no,fair,yes
4,y,senior,,no,fair,yes
5,m,senior,low,yes,,yes
6,e,senior,low,yes,excellent,no
7,s,middle_age,low,yes,excellent,yes
8,a,youth,medium,no,fair,no
9,r,youth,low,yes,fair,yes
10,t,senior,medium,yes,fair,yes
11,r,youth,medium,yes,excellent,yes
12,a,middle_age,medium,no,excellent,yes
13,r,middle_age,High,yes,fair,yes
14,t,senior,medium,no,excellent,no